# use groundhog to make code maximally reproducible
if (!require("groundhog", quietly = TRUE)) {
install.packages("groundhog")
}
library("groundhog")
# use groundhog to install and load packages
pkgs <- c("here", # System path management
"tidyverse", # ggplot, dplyr, %>%, and friends
"tinytable" # Lightweight package to create tables
)
groundhog.library(pkgs, "2024-07-01") Gorilla to Tidy Data: Cognitive Control and Motivated Reasoning
Load the data
I have data from different versions of the tasks and questionnaires. The following code automatically loads them from the specified path and renames them with the task | questionnaire identifier.
# Define the path to stored raw data
raw_dir <- here("01_data", "raw")
# List all CSV files in the folder
raw_files_list <- list.files(path = raw_dir, pattern = "*.csv", full.names = TRUE)
# Function to read a CSV file and assign it to a dataframe with the desired name
read_and_assign_simple <- function(file) {
# Extract parts of the file name
file_name <- basename(file)
parts <- str_match(file_name, "data_exp_.*_(questionnaire|task)-(.*)\\.csv")
task_or_questionnaire <- parts[2]
identifier <- parts[3]
# Create the new name
new_name <- paste0("data_", task_or_questionnaire, "_", identifier)
# Read the CSV file
df <- read_csv(file)
# Filter out rows where "Participant Private ID" is NA
df <- df %>% filter(!is.na(`Participant Private ID`))
# Assign the dataframe to the new name in the global environment
assign(new_name, df, envir = .GlobalEnv)
}
# Apply the function to all files
map(raw_files_list, read_and_assign_simple)Data cleaning
In this section, I want to remove any unnecessary rows and columns and rename the dataframes with something more intuitive.
I start by creating a tibble that matches each task/questionnaire identifier with a name.
identifier_names <- tribble(
~identifier, ~name,
"svnz", "consent",
"av22", "questionnaire_pre",
"uamn", "go_nogo_m",
"8o8a", "go_nogo_p",
"yknh", "fake_news_game",
"nn4b", "debrief"
)
identifier_names %>%
tt(theme = "striped")| identifier | name |
|---|---|
| svnz | consent |
| av22 | questionnaire_pre |
| uamn | go_nogo_m |
| 8o8a | go_nogo_p |
| yknh | fake_news_game |
| nn4b | debrief |
Select, filter, and rename variables
Consent (svnz)
data_consent <- data_questionnaire_svnz %>% select(
`Participant Private ID`,
`UTC Date and Time`,
`Experiment Version`,
`Participant Device`,
`Participant Browser`,
`randomiser-evbs`,
`attention_start object-14 Response`:`recontact object-19 Quantised`)data_consent <- data_consent %>%
rename(
consent1 = `Consent Form object-17 I confirm that I have read and understood the information sheet about the study.
-I Agree
`,
consent2 = `Consent Form object-17 I understand that my participation is voluntary and that I am free to withdraw from the study at any time, without giving a reason.
-I Agree
`,
consent3 = `Consent Form object-17 I agree for my (non-identifiable) data to be uploaded into a public depository for scientific purposes.
-I Agree
`,
consent4 = `Consent Form object-17 I agree to take part in this study.
-I Agree
`,
attention_start = `attention_start object-14 Response`,
recontact = `recontact object-19 Response`
)Questionnaires (av22)
data_questionnaires <- data_questionnaire_av22 %>%
select(
`Participant Private ID`,
`age object-18 Month`:`dog11 object-131 Quantised`,
-contains("Quantised"))data_questionnaires <- data_questionnaires %>%
rename(age_month = `age object-18 Month`,
age_year = `age object-18 Year`,
gender = `gender object-6 Response`,
gender_other = `gender object-6 Other`,
worksit = `worksit object-19 Response`,
worksit_other = `worksit object-19 Other`,
education = `education object-11 Response`,
education_other = `education object-11 Other`,
ideology = `ideology object-38 Response`,
partisanship = `partisanship object-23 Response`,
partisanship_other = `partisanship object-23 Other`,
conservative_rating = `conservative object-24 Value`,
labour_rating = `labour object-27 Value`,
libdem_rating = `libdem object-28 Value`,
green_rating = `green object-29 Value`,
reform_rating = `reform object-30 Value`,
crt1 = `CRT1 object-31 Value`,
crt2 = `CRT2 object-33 Value`,
crt3 = `CRT3 object-35 Value`,
o_immigration = `immigration object-91 Response`,
o_climate = `climate object-92 Response`,
o_punishment = `punishment object-93 Response`,
o_teaculture = `teaculture object-94 Response`,
o_brain = `brain object-96 Response`,
attention_check = `attention object-97 Response`,
o_discrimination = `discrimination object-107 Response`,
o_cats = `cats object-108 Response`,
o_selfenhancement = `selfenhancement object-109 Response`,
o_adoption = `adoption object-110 Response`,
o_gender = `gender object-111 Response`,
dog01 = `dog01 object-112 Response`,
dog02 = `dog02 object-113 Response`,
dog03 = `dog03 object-114 Response`,
dog04 = `dog04 object-115 Response`,
dog05 = `dog05 object-116 Response`,
dog06 = `dog06 object-117 Response`,
dog07 = `dog07 object-127 Response`,
dog08 = `dog08 object-128 Response`,
dog09 = `dog09 object-129 Response`,
dog10 = `dog10 object-130 Response`,
dog11 = `dog11 object-131 Response`
)Go / No-Go M Start (uamn)
data_gng_m_sel <- data_task_uamn %>%
select(
`Participant Private ID`,
`UTC Date and Time`,
`Experiment Version`,
`Participant Device`,
`Participant Browser`,
`Task Name`,
`randomiser-evbs`,
`Event Index`,
`Trial Number`,
Screen,
`Component Name`,
`Object Name`,
`Response Type`,
Response,
`Reaction Time`,
Correct,
`Spreadsheet: stimulus`:`Spreadsheet: trial_id`,
Display,
)data_gng_m_sel <- data_gng_m_sel %>%
rename(
response = Response,
rt = `Reaction Time`,
correct = Correct,
stimulus = `Spreadsheet: response`,
letter = `Spreadsheet: stimulus`,
condition = Display,
trial_id = `Spreadsheet: trial_id`,
)data_gng_m <- data_gng_m_sel %>%
filter(`Response Type` == "response")Go / No-Go P Start (8o8a)
data_gng_p_sel <- data_task_8o8a %>%
select(
`Participant Private ID`,
`UTC Date and Time`,
`Experiment Version`,
`Participant Device`,
`Participant Browser`,
`Task Name`,
`randomiser-evbs`,
`Event Index`,
`Trial Number`,
Screen,
`Component Name`,
`Object Name`,
`Response Type`,
Response,
`Reaction Time`,
Correct,
`Spreadsheet: stimulus`:`Spreadsheet: trial_id`,
Display,
)data_gng_p_sel <- data_gng_p_sel %>%
rename(
response = Response,
rt = `Reaction Time`,
correct = Correct,
stimulus = `Spreadsheet: response`,
letter = `Spreadsheet: stimulus`,
condition = Display,
trial_id = `Spreadsheet: trial_id`,
)data_gng_p <- data_gng_p_sel %>%
filter(`Response Type` == "response")Fake News Game (yknh)
data_fake_news_sel <- data_task_yknh %>%
select(
`Participant Private ID`,
`UTC Date and Time`,
`Experiment Version`,
`Participant Device`,
`Participant Browser`,
`Task Name`,
`Task Version`,
`Trial Number`,
Display,
`Response Type`,
`Component Name`,
Screen,
Tag,
`Reaction Time`,
`Spreadsheet: task`,
`Spreadsheet: identifier`,
`Spreadsheet: question`,
`Spreadsheet: name`,
`Spreadsheet: type`,
Response,
`Spreadsheet: message`,
`Spreadsheet: correct_answer`,
Correct,
`Response Duration`,
)data_fake_news_sel <- data_fake_news_sel %>%
rename(rt = `Reaction Time`,
trial_type = `Spreadsheet: task`,
trial_id = `Spreadsheet: identifier`,
question_type = `Spreadsheet: type`,
question_topic = `Spreadsheet: name`,
question = `Spreadsheet: question`,
response = Response,
message = `Spreadsheet: message`,
correct_answer = `Spreadsheet: correct_answer`,
guess_correct = Correct,
response_duration = `Response Duration`
)data_fake_news <- data_fake_news_sel %>%
filter(`Response Type` == "response")Debrief (nn4b)
data_debrief <- data_questionnaire_nn4b %>%
select(`Participant Private ID`,
`attention_end object-3 Response`)data_debrief <- data_debrief %>%
rename(
attention_end = `attention_end object-3 Response`
)Join relevant data frames
dfs <- list(data_consent, data_questionnaires, data_debrief)
# join all dfs
data_questionnaires_combined <-
reduce(dfs, full_join, by = "Participant Private ID")
head(data_questionnaires_combined) %>%
tt()| Participant Private ID | UTC Date and Time | Experiment Version | Participant Device | Participant Browser | randomiser-evbs | attention_start | attention_start object-14 Quantised | consent1 | consent2 | consent3 | consent4 | recontact | recontact object-19 Quantised | age_month | age_year | gender | gender_other | worksit | worksit_other | education | education_other | ideology | partisanship | partisanship_other | conservative_rating | labour_rating | libdem_rating | green_rating | reform_rating | crt1 | crt2 | crt3 | o_immigration | o_climate | o_punishment | o_teaculture | o_brain | attention_check | o_discrimination | o_cats | o_selfenhancement | o_adoption | o_gender | dog01 | dog02 | dog03 | dog04 | dog05 | dog06 | dog07 | dog08 | dog09 | dog10 | dog11 | attention_end |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 11693545 | 12/09/2024 08:16:23 | 38 | Desktop or Laptop | Chrome 130.0.0.0 | Start P | Yes | 1 | 1 | 1 | 1 | 1 | Yes | 1 | 0 | 39 | male | NA | Employed | NA | Higher secondary or further education (A-levels, T-levels, BTEC, International Baccalaureate or equivalent) | NA | Left | Labour | NA | 3 | 91 | 43 | 84 | 1 | 8 | 10 | 39 | Strongly disagree | Strongly agree | Disagree | Neutral | Neutral | Strongly agree | Strongly agree | Disagree | Agree | Strongly agree | Strongly disagree | Disagree | Strongly agree | Strongly disagree | Strongly agree | Agree | Disagree | Agree | Agree | Strongly agree | Strongly disagree | Disagree | Yes |
| 11693585 | 12/09/2024 08:21:22 | 38 | Desktop or Laptop | Chrome 128.0.0.0 | Start M | Yes | 1 | 1 | 1 | 1 | 1 | Yes | 1 | 9 | 36 | female | NA | Employed | NA | Higher secondary or further education (A-levels, T-levels, BTEC, International Baccalaureate or equivalent) | NA | Right | Conservative | NA | 94 | 0 | 0 | 10 | 91 | 8 | 50 | 80 | Neutral | Neutral | Agree | Agree | Neutral | Strongly agree | Neutral | Agree | Agree | Agree | Disagree | Disagree | Agree | Disagree | Agree | Agree | Disagree | Agree | Agree | Agree | Disagree | Neutral | Yes |
| 11693590 | 12/09/2024 08:21:53 | 38 | Desktop or Laptop | Chrome 128.0.0.0 | Start P | Yes | 1 | 1 | 1 | 1 | 1 | Yes | 1 | 0 | 32 | male | NA | Self-employed | NA | Bachelors degree (BA, BSc., BEd., BEng.) | NA | Left | Green | NA | 5 | 20 | 30 | 85 | 0 | 4 | 50 | 39 | Strongly disagree | Strongly agree | Strongly disagree | Agree | Strongly agree | Strongly agree | Strongly agree | Strongly agree | Disagree | Strongly agree | Strongly disagree | Disagree | Strongly agree | Strongly disagree | Strongly agree | Strongly agree | Disagree | Strongly agree | Strongly agree | Strongly agree | Strongly disagree | Disagree | Yes |
| 11693627 | 12/09/2024 08:27:29 | 38 | Desktop or Laptop | Chrome 128.0.0.0 | Start P | Yes | 1 | 1 | 1 | 1 | 1 | Yes | 1 | 1 | 36 | female | NA | Employed | NA | Bachelors degree (BA, BSc., BEd., BEng.) | NA | Left | Labour | NA | 27 | 89 | 23 | 35 | 2 | 8 | 50 | 20 | Strongly disagree | Strongly agree | Disagree | Neutral | Neutral | Strongly agree | Strongly agree | Agree | Agree | Strongly agree | Strongly disagree | Disagree | Strongly agree | Disagree | Agree | Agree | Disagree | Agree | Strongly agree | Strongly agree | Disagree | Disagree | Yes |
| 11693677 | 12/09/2024 08:39:20 | 38 | Desktop or Laptop | Chrome 128.0.0.0 | Start M | Yes | 1 | 1 | 1 | 1 | 1 | Yes | 1 | 6 | 37 | female | NA | Employed | NA | Higher secondary or further education (A-levels, T-levels, BTEC, International Baccalaureate or equivalent) | NA | Right | Reform UK | NA | 0 | 0 | 1 | 0 | 75 | 8 | 50 | 20 | Strongly agree | Neutral | Disagree | Strongly agree | Agree | Strongly agree | Neutral | Agree | Agree | Agree | Disagree | Disagree | Neutral | Strongly disagree | Strongly agree | Strongly agree | Strongly disagree | Strongly agree | Strongly agree | Strongly agree | Disagree | Neutral | Yes |
| 11693689 | 12/09/2024 08:39:33 | 38 | Desktop or Laptop | Chrome 128.0.0.0 | Start M | Yes | 1 | 1 | 1 | 1 | 1 | Yes | 1 | 6 | 35 | male | NA | Employed | NA | Bachelors degree (BA, BSc., BEd., BEng.) | NA | Slightly right | Conservative | NA | 60 | 15 | 42 | 12 | 10 | 8 | 2 | 39 | Neutral | Agree | Neutral | Neutral | Disagree | Disagree | Agree | Disagree | Disagree | Disagree | Neutral | Disagree | Agree | Disagree | Agree | Agree | Disagree | Agree | Agree | Agree | Neutral | Neutral | Yes |
data_gng <- bind_rows(data_gng_m, data_gng_p)
head(data_gng) %>%
tt()| Participant Private ID | UTC Date and Time | Experiment Version | Participant Device | Participant Browser | Task Name | randomiser-evbs | Event Index | Trial Number | Screen | Component Name | Object Name | Response Type | response | rt | correct | letter | stimulus | trial_id | condition |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 11693585 | 12/09/2024 08:28:20 | 38 | Desktop or Laptop | Chrome 128.0.0.0 | Go / No-Go Task (M condition) | Start M | 7 | 1 | Stimulus | Keyboard Response | go | response | go | 450 | 1 | M | go | practice_006 | Practice M |
| 11693585 | 12/09/2024 08:28:21 | 38 | Desktop or Laptop | Chrome 128.0.0.0 | Go / No-Go Task (M condition) | Start M | 10 | 2 | Stimulus | Keyboard Response | go | response | go | 500 | 1 | M | go | practice_008 | Practice M |
| 11693585 | 12/09/2024 08:28:23 | 38 | Desktop or Laptop | Chrome 128.0.0.0 | Go / No-Go Task (M condition) | Start M | 13 | 3 | Stimulus | Time Limit | screen | response | nogo | 500 | 1 | W | nogo | practice_009 | Practice M |
| 11693585 | 12/09/2024 08:28:24 | 38 | Desktop or Laptop | Chrome 128.0.0.0 | Go / No-Go Task (M condition) | Start M | 16 | 4 | Stimulus | Keyboard Response | go | response | go | 392 | 1 | M | go | practice_004 | Practice M |
| 11693585 | 12/09/2024 08:28:26 | 38 | Desktop or Laptop | Chrome 128.0.0.0 | Go / No-Go Task (M condition) | Start M | 19 | 5 | Stimulus | Time Limit | screen | response | nogo | 500 | 1 | W | nogo | practice_010 | Practice M |
| 11693585 | 12/09/2024 08:28:27 | 38 | Desktop or Laptop | Chrome 128.0.0.0 | Go / No-Go Task (M condition) | Start M | 22 | 6 | Stimulus | Keyboard Response | go | response | go | 364 | 1 | M | go | practice_007 | Practice M |
Remove participants who failed attention check
As pre-registered, I remove participants who failed a very obvious attention check right at the beginning of the study. One participant had to be removed due to ethical reasons as they incorrectly indicated their age on Prolific.
data_questionnaires %>%
filter(attention_check != "Strongly agree") %>%
select(`Participant Private ID`, age_year, ideology) %>%
tt()| Participant Private ID | age_year | ideology |
|---|---|---|
| 11693689 | 35 | Slightly right |
| 11700313 | 24 | Slightly right |
data_gng_f <- data_gng %>%
filter(!(`Participant Private ID` %in% c(11693689, 11700313)))
data_fake_news_f <- data_fake_news %>%
filter(!(`Participant Private ID` %in% c(11693689, 11700313)))
data_questionnaires_combined_f <- data_questionnaires_combined %>%
filter(!(`Participant Private ID` %in% c(11693689, 11700313)))Check technical errors
data_questionnaires_combined %>%
count(`Participant Private ID`) %>%
filter(n != 1)# A tibble: 0 × 2
# ℹ 2 variables: Participant Private ID <dbl>, n <int>
The questionnaires look good.
data_gng %>%
count(`Participant Private ID`) %>%
filter(n != 300)# A tibble: 2 × 2
`Participant Private ID` n
<dbl> <int>
1 11694557 450
2 11695853 296
For some reason two IDs have not 300 observations which should not happen and is a technical error from Gorilla. As the study was set up, it is not possible to have less or more than 300 trials, and this thus an indication that there was an error and data cannot be used.
data_fake_news %>%
count(`Participant Private ID`) %>%
filter(n > 35 | n < 25)# A tibble: 2 × 2
`Participant Private ID` n
<dbl> <int>
1 11694792 55
2 11696062 55
Another two participants have a weird number of observations and need to be removed. This should not happen and again is a technical issue from Gorilla.
Remove technical errors
This was not pre-registered, as technical errors outside of the control of the researchers cannot be.
data_gng_f <- data_gng_f %>%
filter(!(`Participant Private ID` %in% c(11694557, 11695853,
11694792, 11696062)))
data_fake_news_f <- data_fake_news_f %>%
filter(!(`Participant Private ID` %in% c(11694557, 11695853,
11694792, 11696062)))
data_questionnaires_combined_f <- data_questionnaires_combined_f %>%
filter(!(`Participant Private ID` %in% c(11694557, 11695853,
11694792, 11696062)))Check the age of participants
I intended to recruit only 18-38 year olds - we expect some deviation as this is based on Prolific and not all information is always up to date. Let’s check if there are some participants that incorrectly stated their age.
data_questionnaires_combined_f %>%
filter(age_year < 18 | age_year > 38) %>%
select(`Participant Private ID`, age_year)# A tibble: 7 × 2
`Participant Private ID` age_year
<dbl> <chr>
1 11693545 39
2 11694382 NaN
3 11694592 40
4 11694730 39
5 11694907 39
6 11695000 51
7 11695048 124
I am not super concered about the participants who are slightly above 38. However, the others, I contacted on Prolific. Two of them could correct their age and reported the following ages:
- 11695048 –> 38
- 11694382 -> 38
So let’s create a age variable that takes the age in years for all participants, but for these two uses the corrected values.
data_questionnaires_combined_f <- data_questionnaires_combined_f %>%
mutate(age_year = as.numeric(age_year)) %>%
mutate(age_corrected = case_when(
`Participant Private ID` == 11695048 ~ 38,
`Participant Private ID` == 11694382 ~ 38,
TRUE ~ age_year
)) Demographics of filtered participants
data_questionnaires_combined %>%
filter(`Participant Private ID` %in% c(11693689, 11700313,
11694557, 11695853,
11694792, 11696062)) %>%
select(`Participant Private ID`, `Participant Browser`, ideology,
age_year) %>%
tt()| Participant Private ID | Participant Browser | ideology | age_year |
|---|---|---|---|
| 11693689 | Chrome 128.0.0.0 | Slightly right | 35 |
| 11694557 | Edge 128.0.0.0 | Slightly left | 32 |
| 11694792 | Firefox 130.0 | Slightly left | 33 |
| 11696062 | Chrome 128.0.0.0 | Right | 16 |
| 11700313 | Chrome 128.0.0.0 | Slightly right | 24 |
| 11695853 | Mobile Safari 17.6 | Slightly right | 34 |
This leaves us with the final pre-registered sample size of exactly N = 504 participants.
Save dataframes
Save different .csv data frames for further analyses.
# questionnaire data
write_csv(data_questionnaires_combined_f, here("01_data", "cleaned", "data_questionnaires_cleaned.csv"),
na = "", append = FALSE, col_names = TRUE)
# gng data
write_csv(data_gng_f, here("01_data", "cleaned", "data_gng_cleaned.csv"),
na = "", append = FALSE, col_names = TRUE)
# fake news task data
write_csv(data_fake_news_f, here("01_data", "cleaned", "data_fake_news_cleaned.csv"),
na = "", append = FALSE, col_names = TRUE)